clear all
set more off
set mem 10000000
set matsize 10000
version 13

****************************************************************** 
*** Build File to Process Raw Census 2001 PCA ********************
****************************************************************** 

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** 2001 PCA (this is the master dataset)

* Clean and rename some variables
{
use "$pca/pca_census01_india.dta", clear
rename state st_code
rename district dt_code
rename tahsil bk_code
rename town_vill vi_code
gen temp_state = ""
replace temp_state = upper(trim(itrim(name))) if level=="STATE"
egen state = mode(temp_state), by(st_code)
gen temp_dist = ""
replace temp_dist = upper(trim(itrim(subinstr(name,"*","",.)))) if level=="DISTRICT"
egen district = mode(temp_dist), by(st_code dt_code)
gen temp_block = ""
replace temp_block = upper(trim(itrim(name))) if inlist(level,"C.D.BLOCK","CIRCLE","DEVELOPMENT BLOCK","POLICE STATION","R.D.BLOCK","SUB DIVISION","TALUK","TEHSIL")
egen block = mode(temp_block), by(st_code dt_code bk_code)
replace block = subinstr(subinstr(subinstr(block,"*","",.)," -"," ",.),"- "," ",.)
replace block = subinstr(subinstr(subinstr(subinstr(block,"( ","(",.)," )",")",.),"-I"," I",.),"-1"," I",.)
replace block = subinstr(subinstr(subinstr(block,"(T)","",.),"(S.T)","",.),"(M)","",.)
replace block = subinstr(subinstr(subinstr(block,"C.D.BLOCK","",.),"SUB-DIV.","",.),"SUB-DIVISION","",.)
replace block = trim(itrim(subinstr(subinstr(subinstr(block,"(P)","",.),"CIRCLE","",.),"CD BLOCK","",.)))
order st_code state dt_code district bk_code block
assert state!="" & district!="" & block!="" if level=="VILLAGE"
keep if level=="VILLAGE"
rename name village
replace village = upper(trim(itrim(village)))
*only 14 villages in Bihar with missing village code
*only 6 villages with missing name
replace state = "MANIPUR" if regexm(state,"MANIPUR")
replace state = "UTTARAKHAND" if state=="UTTARANCHAL"
replace block = "SIRPUR (T)" if block=="SIRPUR" & district=="ADILABAD" & bk_code==11
rename bk_code bk_code_pca
duplicates drop
gen pca01_id = _n
order pca01_id 
}

* Redefine variables as percentages
{
gen pct_06 = p_06/tot_p
gen pct_sc = p_sc/tot_p
gen pct_st = p_st/tot_p
gen lit_p = p_lit/tot_p
gen lit_m = m_lit/tot_m
gen lit_f = f_lit/tot_f
gen work_p = tot_work_p/tot_p
gen work_m = tot_work_m/tot_m
gen work_f = tot_work_f/tot_f
gen work_main_p = mainwork_p/tot_p
gen work_main_m = mainwork_m/tot_m
gen work_main_f = mainwork_f/tot_f
gen work_main_cl_p = main_cl_p/tot_p
gen work_main_cl_m = main_cl_m/tot_m
gen work_main_cl_f = main_cl_f/tot_f
gen work_main_al_p = main_al_p/tot_p
gen work_main_al_m = main_al_m/tot_m
gen work_main_al_f = main_al_f/tot_f
gen work_main_hh_p = main_hh_p/tot_p
gen work_main_hh_m = main_hh_m/tot_m
gen work_main_hh_f = main_hh_f/tot_f
gen work_main_ot_p = main_ot_p/tot_p
gen work_main_ot_m = main_ot_m/tot_m
gen work_main_ot_f = main_ot_f/tot_f
gen work_marg_p = margwork_p/tot_p
gen work_marg_m = margwork_m/tot_m
gen work_marg_f = margwork_f/tot_f
gen work_marg_cl_p = marg_cl_p/tot_p
gen work_marg_cl_m = marg_cl_m/tot_m
gen work_marg_cl_f = marg_cl_f/tot_f
gen work_marg_al_p = marg_al_p/tot_p
gen work_marg_al_m = marg_al_m/tot_m
gen work_marg_al_f = marg_al_f/tot_f
gen work_marg_hh_p = marg_hh_p/tot_p
gen work_marg_hh_m = marg_hh_m/tot_m
gen work_marg_hh_f = marg_hh_f/tot_f
gen work_marg_ot_p = marg_ot_p/tot_p
gen work_marg_ot_m = marg_ot_m/tot_m
gen work_marg_ot_f = marg_ot_f/tot_f
}

* Label new variables
{
la var pct_06 				"% pop 0-6 years old"
la var pct_sc 				"% pop scheduled caste"
la var pct_st 				"% pop scheduled tribe"
la var lit_p  				"% pop literate"
la var lit_m  				"% male pop literate"
la var lit_f  				"% female pop literate"
la var work_p  				"% pop workers"
la var work_m  				"% male pop workers"
la var work_f  				"% female pop workers"
la var work_main_p  	"% pop main workers"
la var work_main_m  	"% male pop main workers"
la var work_main_f  	"% female pop main workers"
la var work_main_cl_p "% pop main cultiavtors"
la var work_main_cl_m "% male pop main cultiavtors"
la var work_main_cl_f "% female pop main cultiavtors"
la var work_main_al_p "% pop main agri-laborers"
la var work_main_al_m "% male pop main agri-laborers"
la var work_main_al_f "% female pop main agri-laborers"
la var work_main_hh_p "% pop main household industry workers"
la var work_main_hh_m "% male pop main household industry workers"
la var work_main_hh_f "% female pop main household industry workers"
la var work_main_ot_p "% pop main other workers"
la var work_main_ot_m "% male pop main other workers"
la var work_main_ot_f "% female pop main other workers"
la var work_marg_p  	"% pop marg workers"
la var work_marg_m  	"% male pop marg workers"
la var work_marg_f  	"% female pop marg workers"
la var work_marg_cl_p "% pop marg cultiavtors"
la var work_marg_cl_m "% male pop marg cultiavtors"
la var work_marg_cl_f "% female pop marg cultiavtors"
la var work_marg_al_p "% pop marg agri-laborers"
la var work_marg_al_m "% male pop marg agri-laborers"
la var work_marg_al_f "% female pop marg agri-laborers"
la var work_marg_hh_p "% pop marg household industry workers"
la var work_marg_hh_m "% male pop marg household industry workers"
la var work_marg_hh_f "% female pop marg household industry workers"
la var work_marg_ot_p "% pop marg other workers"
la var work_marg_ot_m "% male pop marg other workers"
la var work_marg_ot_f "% female pop mairg other workers"
}


* Rename and save as 2001 PCA dataset
{
foreach v of varlist p_06-work_marg_ot_f {
  rename `v' `v'_01
}
la var pca01_id "2001 PCA id"
la var st_code "2001 state code"
la var dt_code "2001 district code"
la var bk_code_pca "2001 block code"
la var vi_code "2001 village code"
la var state "State name"
la var district "District name"
la var block "Block name"
la var village "Village name"

drop if inlist(st_code,4,7,25,26,30,31,34,35)  // drop non-RGGVY states
drop temp* level eb ward tru
duplicates drop
compress
save "$pca/pca_census01.dta", replace

drop p_06_01-work_marg_ot_f_01 pct_06-wor
save "$pca/pca_census01_names.dta", replace
}

********************************************************************************
********************************************************************************

